Import libraries

In [1]:
import pandas as pd
from datetime import datetime
import plotly.express as px
import os, glob

Set folder path on server

In [2]:
folder_path = r"/home/pi/HDD/Datasets"
file_path = r"{folder_path}/Covid19_MoHFW_{extract_date}.csv"
html_path = r"/home/pi/HDD/webpages/charts/MoHFW{html_file}.html"
final_csv_path = r"{folder_path}/Covid19_MoHFW.csv".format(folder_path=folder_path)

Read data from website

In [3]:
df = pd.read_json("https://www.mohfw.gov.in/data/datanew.json")

Clean data.

convert serial number to number
Remove asterisks from name
Add datetime field to dataset
In [4]:
extract_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
df["extract_date"] = extract_date

save dataset into csv on the server

In [5]:
file_path = file_path.format(folder_path=folder_path,extract_date=extract_date.replace("-","").replace(" ","").replace(":",""))
df.to_csv(file_path,index=False)

Load df

  • read csv files into df
  • take data from latest extract time for each date
  • fix Telangana name issue
  • export consolidated dataset
In [6]:
del(df)
df = pd.concat(map(pd.read_csv, glob.glob(os.path.join(folder_path, "*.csv"))))
In [7]:
df["date"]= df["extract_date"].str[:10]
df_dates = df.groupby(["date"])["extract_date"].max().reset_index()
df = df[(df["extract_date"].isin(df_dates["extract_date"]))]
In [8]:
df["state_name"].replace(to_replace=["\*"],value="",inplace=True,regex=True)
df["state_name"].replace(to_replace=["Telengana"],value="Telangana",inplace=True)
df["total"]=df["active"].fillna(0)+df["death"].fillna(0)+df["cured"].fillna(0)
df = df.sort_values(by=["state_name","extract_date"])
df.to_csv(final_csv_path,index=False)

Graphs

In [9]:
fig = px.scatter(df[(df["state_name"].isin(["Karnataka","Maharashtra","Kerala","Andhra Pradesh","Telangana","Tamil Nadu"]))], x="extract_date", y=["active"], title='Active Cases',color="state_name").update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file="ActiveCases"))
fig.show()
In [10]:
fig = px.scatter(df[(df["state_name"].isin(["Karnataka","Maharashtra","Kerala","Andhra Pradesh","Telangana","Tamil Nadu"]))], x="extract_date", y=["death"], title='Deaths',color="state_name").update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file="Deaths"))
fig.show()
In [11]:
fig = px.scatter(df[(df["state_name"].isin(["Karnataka","Maharashtra","Kerala","Andhra Pradesh","Telangana","Tamil Nadu"]))], x="extract_date", y=["total"], title='Total Cases log scale',color="state_name",log_y=True).update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file="TotalCasesLogScale"))
fig.show()
In [12]:
fig = px.scatter(df[(df["state_name"].isin(["Karnataka","Maharashtra","Kerala","Andhra Pradesh","Telangana","Tamil Nadu"]))], x="extract_date", y=["death"], title='Total Deaths log scale',color="state_name",log_y=True).update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file="TotalDeathsLogScale"))
fig.show()